home *** CD-ROM | disk | FTP | other *** search
- /*
- * ksort.c
- * Copyright (c) 1989 Josh Greifer
- * sort big text files that DOS sort.com can't cope with
- * over 30 times faster than DOS sort.com
- * try it on huge files!
- *
- * compile with Microsoft C V. 6.0 like this:
- * (note that the Compact Model is used)
- * cl /AC /Ox ksort.c
- * or use the makefile provided:
- * make ksort.mak
- */
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <malloc.h>
- #include <ctype.h>
-
- #define MAXLINELEN 1024
- #define DEFARRAYSIZE 16000
- #define MAXARRAYSIZE 64000
- #define MAXFILES FOPEN_MAX-5
- #define EMBUFSIZE 4096
- #define QSSSIZE 1024
-
- #define DUMMY1ST ((char *)-1)
- #define DUMMYLAST ((char *)-2)
-
- FILE *inf, *ouf;
- int _near offset = 1;
- int _near skipspace;
- int _near rename_;
- int _near verbose;
- unsigned int _near asize;
- unsigned int _near insert;
-
- char *infname, *oufname;
-
- typedef struct {
- FILE *fp; /* file pointer */
- char *fn; /* file name */
- char *buf; /* buffer */
- } FS;
-
- FS _near tmpftab[MAXFILES];
-
- int _near tmpfiles; /* number of temp files created */
-
- void parseargs(int, char **);
- void error(char *, char *);
- int revcmpi(char *, char *);
- int revcmp(char *, char *);
- void merge(void);
- void spool_to_tmpfile( char * _huge *array );
-
- void ksort( char * _huge *, unsigned int, int (*)( char *, char *));
- void isort( char * _huge *, unsigned int, int (*)( char *, char *));
-
- int (*cmpfns[])(char *, char *) = { stricmp, strcmp };
- int (*revcmpfns[])(char *, char *) = { revcmpi, revcmp };
- int (**cmpfn)(char *, char *) = cmpfns;
-
- void getnextline( FS *f );
-
- unsigned int _near * _near qs; /* quicksort stack bottom */
- char _near *qsss = "QuickSort internal stack";
- char *emergency; /* freed when malloc() fails */
- char * _huge *array;
-
- main(int argc, char **argv)
- {
- char buf[MAXLINELEN];
- char *p = buf;
- register unsigned int i=0, j=0;
- long l = 0L;
-
- tmpftab[0].fp = (FILE *)1; /* pseudo file */
-
- emergency = malloc(EMBUFSIZE);
- parseargs(argc, argv);
-
- if ((array = (char *_huge *)halloc((asize+1),sizeof(char *))) == NULL)
- error("Default buffer too big - use /B to specify size", NULL);
-
- if ((qs = (int _near *)_nmalloc(QSSSIZE*sizeof(int)))==0)
- error("Can't allocate %s\n", qsss);
- loop:
- if (verbose) fprintf(stderr, "\nReading file...");
- if (verbose == 2) fprintf(stderr, "\nReading line %ld", l);
-
- for (i = 1; i < asize && fgets(buf, MAXLINELEN, inf); i++, l++) {
- if (verbose == 2) fprintf(stderr, "\rReading line %ld", l);
- if (skipspace) for (p = buf; isspace(*p); p++);
- if ((array[i] = strdup(p)) == NULL) {
- free(emergency);
- emergency = NULL;
- array[i++] = strdup(p);
- break;
- }
- }
- if (verbose) fprintf(stderr, "\nSorting...");
- array[0] = DUMMY1ST;
- array[i] = DUMMYLAST;
-
- ksort(array, i-1, *cmpfn);
-
- if (i == asize || !emergency) {
-
- spool_to_tmpfile(array);
-
- if (!emergency) emergency = malloc(EMBUFSIZE);
- goto loop;
- }
- else {
- if (tmpfiles) merge();
- else {
- if (verbose) fprintf(stderr, "\nOutputting sorted file...");
- for (j = 1; j < i; j++)
- fputs(array[j], ouf);
- }
- }
- fclose(inf);
- fclose(ouf);
- if (rename_) {
- unlink(infname);
- rename(oufname, infname);
- }
- return 0;
- }
-
- void spool_to_tmpfile( char * _huge *array )
- {
- char buf[FILENAME_MAX];
-
- FS *tmpfs = &tmpftab[++tmpfiles];
-
- unsigned int j;
-
- if (verbose)
- fprintf(stderr,"\nWriting temp file %d...",tmpfiles);
-
- sprintf(buf, "SRTQ_%d.$$$", tmpfiles);
- tmpfs->fn = strdup(buf);
- if ((tmpfs->fp = fopen(tmpfs->fn, "w+"))==0)
- error("Error creating temp file %s", tmpfs->fn);
- if ((tmpfs->buf = (char *)malloc(MAXLINELEN)) == 0)
- error("Can't allocate temp file buffer.\n", NULL);
-
- for (j = 1; array[j] != DUMMYLAST; j++) {
- fputs(array[j], tmpfs->fp);
- if (array[j] != DUMMY1ST) free(array[j]);
- }
- }
-
- void error(char *message, char *var)
- {
- if (emergency) free(emergency);
- if (*message != '\n') fprintf(stderr, "\nFatal error:");
- fprintf(stderr, message, var);
- exit(1);
- }
-
-
- void merge()
- {
- register FS *i,*j;
- FS *maxtmpf = &tmpftab[tmpfiles+1];
-
- if (verbose) fprintf(stderr, "\nMerging %d temp file%c..", tmpfiles,
- tmpfiles > 1 ? 's' : '.');
-
- for (i = tmpftab+1; i < maxtmpf; i++) {
- fflush(i->fp);
- fseek(i->fp,0L,SEEK_SET);
- }
-
- for (i = tmpftab; i < maxtmpf; i++)
- if (i->fp) /* tmpftab[m].fp set to NULL after EOF */
- getnextline(i);
- if (verbose) fprintf(stderr, "\nOutputting sorted file...");
- for (;;) {
- for (j = tmpftab; !j->fp; j++) /* find a buffer */
- if (j >= maxtmpf) return; /* EOF on all temp files */
- for (i = j+1; i < maxtmpf; i++) /* compare all other bufs with j */
- if (i->fp)
- if ((*cmpfn)(j->buf+offset, i->buf+offset) > 0)
- j = i; /* j := next buffer to be output */
- fputs(j->buf, ouf);
- getnextline(j);
- }
- }
-
- void getnextline( register FS *f )
- {
- static int pos;
- if (f == tmpftab) { /* pseudo-file -- data is in array[] */
- if ((f->buf = array[++pos]) == DUMMYLAST) {
- f->fp = NULL;
- f->buf = "";
- }
- }
- else if (fgets(f->buf, MAXLINELEN, f->fp)==NULL) {
- fclose(f->fp);
- f->fp = NULL;
- unlink(f->fn);
- }
- }
-
- #define CLEARSTACK() stackptr = qs; stackend = stackptr+QSSSIZE-2
- #define STACKEMPTY() (stackptr == qs)
- #define PUSH(a,b) if (stackptr >= stackend) error("%v overflow", qsss); \
- else *stackptr++ = (a); *stackptr++ = (b)
- #define POP(a,b) (b) = *--stackptr; (a) = *--stackptr
-
- #define SWAP(Km, Kn) temp = (Km); (Km) = (Kn); (Kn) = temp
-
- #define LESSTHAN(Km, Kn) ((Km) == DUMMY1ST ? 1 : \
- (Kn) == DUMMY1ST ? 0 : (Km) == DUMMYLAST ? 0 : \
- (Kn) == DUMMYLAST ? 1 : (*comp)( (Km)+offset, (Kn)+offset) < 0)
-
- #define GREATERTHAN(Km, Kn) ((Km) == DUMMY1ST ? 0 : \
- (Kn) == DUMMY1ST ? 1 : (Km) == DUMMYLAST ? 1 : \
- (Kn) == DUMMYLAST ? 0 : (*comp)( (Km)+offset, (Kn)+offset) > 0)
-
- void ksort( char * _huge *v, unsigned int N, int (*comp)( char *, char *))
- {
- unsigned int l, r, i, j;
- char *temp;
- unsigned int _near *stackptr;
- unsigned int _near *stackend;
- unsigned int M = insert;
- unsigned int p = 0;
-
- /* Q1: Initialize */
- if (N <= M) goto Q9;
- if (verbose) fprintf(stderr, "\nQuickSort...%c",
- verbose == 2 ? '\n' : '\0');
- CLEARSTACK();
- l = 1; r = N;
-
- Q2: /* Begin new stage */
- if (verbose == 2) fprintf(stderr, "\r%u partitions processed",++p);
- i = l; j = r+1;
-
- Q3: /* Compare Ki : K */
- ++i;
- if (LESSTHAN(v[i], v[l]))
- goto Q3;
-
- Q4: /* Compare K : Kj */
- --j;
- if (LESSTHAN(v[l], v[j]))
- goto Q4;
-
- /* Q5: Test i : j */
- if (j <= i) {
- SWAP(v[l], v[j]);
- goto Q7;
- }
-
- /* Q6: Exchange */
- SWAP(v[i], v[j]);
- goto Q3;
-
- Q7: /* Put on stack */
- if ((r-j >= j-l) && (j-l > M) ) {
- PUSH(j+1, r);
- r = j-1;
- goto Q2;
- }
- if ((j-l > r-j) && (r-j > M)) {
- PUSH(l, j-1);
- l = j+1;
- goto Q2;
- }
- if ((r-j > M) && (M >= j-l)) {
- l = j+1;
- goto Q2;
- }
- if ((j-l > M) && (M >= r-j) ) {
- r = j-1;
- goto Q2;
- }
-
- /* Q8: take off stack */
- if (!STACKEMPTY()) {
- POP(l, r);
- goto Q2;
- }
- Q9: /* Straight insertion sort */
- if (M > 1) isort(v, N, comp);
- }
-
- void isort( char * _huge *v, unsigned int N, int (*comp)( char *, char *))
- {
- unsigned int i, j;
- char *K;
- if (verbose) fprintf(stderr, "\nInsertion sort...%c",
- verbose == 2 ? '\n' : '\0');
- for (j = 2; j <= N; j++) {
- if (verbose == 2) fprintf(stderr, "\r%u records processed",j-1);
- if (GREATERTHAN(v[j-1], v[j])) {
- K = v[j];
- i = j-1;
- do {
- v[i+1] = v[i];
- i--;
- } while (GREATERTHAN(v[i], K));
- v[i+1] = K;
- }
- }
- }
-
- int revcmpi( register char *a1, register char *a2)
- {
- return strcmpi(a2, a1);
- }
-
- int revcmp( register char *a1, register char *a2)
- {
- return strcmp(a2, a1);
- }
-
- void parseargs(int argc, char **argv)
- {
- static char usage[] = "\n\
- *** Ksort version 1.03 Copyright (c) Josh Greifer 1991 *** \n\
- Usage: ksort [/R] [/+<n>] [/N] [/S] [/V|v] [/B<n>] [/I<n>] [infile] [outfile]\n\
- options: R - reverse sort\n\
- +<column> - start sort at column <n>\n\
- N - Don't ignore case\n\
- S - Skip and trim leading white space\n\
- v - verbose -- screen messages\n\
- V - Very verbose -- more screen messages\n\
- B<n> - Buffer size (number of lines) -- max %u\n\
- I<n> - Use Insertion Sort if fewer than <n> lines";
- register char *c;
- while (--argc) {
- c = *++argv;
- if (*c == '/')
- switch (*++c) {
- case 's':
- case 'S': skipspace++; break;
- case 'v': verbose=1; break;
- case 'V': verbose=2; break;
- case 'r':
- case 'R': cmpfn = revcmpfns; break;
- case 'n':
- case 'N': cmpfn++; break;
- case '+': offset = atoi(++c); break;
- case 'b':
- case 'B': asize = atoi(++c); break;
- case 'i':
- case 'I': insert = atoi(++c); break;
- default: error(usage, (char *)MAXARRAYSIZE);
- }
- else if (!inf) {
- infname = c;
- if ((inf = fopen(infname, "r")) == NULL)
- error("\nCan't open input file",infname);
- }
- else if (!ouf) {
- oufname = c;
- if ((ouf = fopen(oufname, "w")) == NULL)
- error("\nCan't open output file",oufname);
- }
- else error(usage, NULL);
- }
- if (!ouf)
- if (inf) {
- ouf = fopen(oufname = strdup(tmpnam(NULL)), "w");
- rename_ = 1;
- } else {
- inf = stdin;
- ouf = stdout;
- }
-
- offset--;
- if (asize <= 1) asize = DEFARRAYSIZE;
- if (insert <= 1) insert = 1;
- if (insert >= asize) insert = asize;
- }